其他
关于GCN,我有三种写法
DGL简介
Prepare
import time
import math
import dgl
import numpy as np
import torch.nn as nn
from dgl.data import citation_graph as citegrh
from dgl import DGLGraph
import dgl.function as fn
import networkx as nx
import torch.nn.functional as F
from dgl.nn import GraphConv
# from dgl.nn.pytorch import GraphConv
# from dgl.nn.pytorch.conv import GraphConv
print(dgl.__version__)
print(nx.__version__)
0.4.3
2.3
GCN
def __init__(self,
g,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout):
super(GCN, self).__init__()
self.g = g
self.layers = nn.ModuleList()
# input layer
self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
# output layer
for i in range(n_layers - 1):
self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
# output layer
self.layers.append(GraphConv(n_hidden, n_classes))
self.dropout = nn.Dropout(p=dropout)
def forward(self, features):
h = features
for i, layers in enumerate(self.layers):
if i!=0:
h = self.dropout(h)
h = layers(self.g, h)
return h
3.2 Second version
3.2.1 ndata
g.ndata['x'] = x
g.ndata['x'][[0, 1, 2]] = th.zeros(3, 3)
g.ndata['x'][th.tensor([0, 1, 2])] = th.randn((3, 3))
# Access edge set with IDs in integer, list, or integer tensor
g.edata['w'][1] = th.randn(1, 2)
g.edata['w'][[0, 1, 2]] = th.zeros(3, 2)
g.edata['w'][th.tensor([0, 1, 2])] = th.zeros(3, 2)
# You can get the edge ids by giving endpoints, which are useful for accessing the features.
g.edata['w'][g.edge_id(1, 0)] = th.ones(1, 2) # edge 1 -> 0
g.edata['w'][g.edge_ids([1, 2, 3], [0, 0, 0])] = th.ones(3, 2) # edges [1, 2, 3] -> 0
# Use edge broadcasting whenever applicable.
g.edata['w'][g.edge_ids([1, 2, 3], 0)] = th.ones(3, 2) # edges [1, 2, 3] -> 0
3.2.2 UDFs
msg = edge.src['h'] * edge.src['norm']
return {'m': msg}
# 需要注意:消息存放在 mailbox 的第二个维上,第一维是消息的数量
accum = torch.sum(node.mailbox['m'], dim=1) * node.data['norm']
return {'h': accum}
def __init__(self, out_feats, activation=None, bias=True):
super(NodeApplyModule, self).__init__()
if bias:
self.bias = nn.Parameter(torch.Tensor(out_feats))
else:
self.bias = None
self.activation = activation
self.reset_parameters()
def reset_parameters(self):
if self.bias is not None:
stdv = 1. / math.sqrt(self.bias.size(0))
self.bias.data.uniform_(-stdv, stdv)
def forward(self, nodes):
h = nodes.data['h']
if self.bias is not None:
h = h + self.bias
if self.activation:
h = self.activation(h)
return {'h': h}
reduce_func='default',
apply_node_func='default')
3.2.3 GCNLayer
def __init__(self,
g,
in_feats,
out_feats,
activation,
dropout,
bias=True):
super(GCNLayer, self).__init__()
self.g = g
self.weight = nn.Parameter(torch.Tensor(in_feats, out_feats))
if dropout:
self.dropout = nn.Dropout(p=dropout)
else:
self.dropout = 0.
self.node_update = NodeApplyModule(out_feats, activation, bias)
self.reset_parameters()
def reset_parameters(self):
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.data.uniform_(-stdv, stdv)
def forward(self, h):
if self.dropout:
h = self.dropout(h)
self.g.ndata['h'] = torch.mm(h, self.weight)
self.g.update_all(gcn_msg, gcn_reduce, self.node_update)
h = self.g.ndata.pop('h')
return h
def __init__(self,
g,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout):
super(GCN, self).__init__()
self.layers = nn.ModuleList()
# input layer
self.layers.append(GCNLayer(g, in_feats, n_hidden, activation, dropout))
# hidden layers
for i in range(n_layers - 1):
self.layers.append(GCNLayer(g, n_hidden, n_hidden, activation, dropout))
# output layer
self.layers.append(GCNLayer(g, n_hidden, n_classes, None, dropout))
def forward(self, features):
h = features
for layer in self.layers:
h = layer(h)
return h
3.3 Third version
dgl.function.copy_src(src, out):Message 函数其实就是把源节点的特征拷贝到目标节点,所以可以换用内置的 copy_src 函数。 dgl.function.sum(msg, out):Reduce 函数其实就是聚合节点 Mailbox 中的消息,所以可以换用内置的 sum 函数。
def __init__(self,
g,
in_feats,
out_feats,
activation,
dropout,
bias=True):
super(GCNLayer, self).__init__()
self.g = g
self.weight = nn.Parameter(torch.Tensor(in_feats, out_feats))
if bias:
self.bias = nn.Parameter(torch.Tensor(out_feats))
else:
self.bias = None
self.activation = activation
if dropout:
self.dropout = nn.Dropout(p=dropout)
else:
self.dropout = 0.
self.reset_parameters()
def reset_parameters(self):
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.uniform_(-stdv, stdv)
def forward(self, h):
if self.dropout:
h = self.dropout(h)
h = torch.mm(h, self.weight)
# normalization by square root of src degree
h = h * self.g.ndata['norm']
self.g.ndata['h'] = h
self.g.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
h = self.g.ndata.pop('h')
# normalization by square root of dst degree
h = h * self.g.ndata['norm']
# bias
if self.bias is not None:
h = h + self.bias
if self.activation:
h = self.activation(h)
return h
这里的做了两次的标准化,对应 GCN 公式中的 ; 这里把 Node 的 Apply 函数的功能合并到 GCNLayer 中了。
def __init__(self,
g,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout):
super(GCN, self).__init__()
self.layers = nn.ModuleList()
# input layer
self.layers.append(GCNLayer(g, in_feats, n_hidden, activation, 0.))
# hidden layers
for i in range(n_layers - 1):
self.layers.append(GCNLayer(g, n_hidden, n_hidden, activation, dropout))
# output layer
self.layers.append(GCNLayer(g, n_hidden, n_classes, None, dropout))
def forward(self, features):
h = features
for layer in self.layers:
h = layer(h)
return h
训练
gpu=-1
lr=0.01
n_epochs=200
n_hidden=16 # 隐藏层节点的数量
n_layers=2 # 输入层 + 输出层的数量
weight_decay=5e-4 # 权重衰减
self_loop=True # 自循环
data = citegrh.load_cora()
features = torch.FloatTensor(data.features)
labels = torch.LongTensor(data.labels)
train_mask = torch.BoolTensor(data.train_mask)
val_mask = torch.BoolTensor(data.val_mask)
test_mask = torch.BoolTensor(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
g = data.graph
if self_loop:
g.remove_edges_from(nx.selfloop_edges(g))
g.add_edges_from(zip(g.nodes(), g.nodes()))
g = DGLGraph(g)
if gpu < 0:
cuda = False
else:
cuda = True
torch.cuda.set_device(gpu)
features = features.cuda()
labels = labels.cuda()
train_mask = train_mask.cuda()
val_mask = val_mask.cuda()
test_mask = test_mask.cuda()
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
if cuda:
norm = norm.cuda()
g.ndata['norm'] = norm.unsqueeze(1)
model = GCN(g,
in_feats,
n_hidden,
n_classes,
n_layers,
F.relu,
dropout)
if cuda:
model.cuda()
loss_fcn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),
lr=lr,
weight_decay=weight_decay)
def evaluate(model, features, labels, mask):
model.eval()
with torch.no_grad():
logits = model(features)
logits = logits[mask]
labels = labels[mask]
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels)
dur = []
for epoch in range(n_epochs):
model.train()
t0 = time.time()
# forward
logits = model(features)
loss = loss_fcn(logits[train_mask], labels[train_mask])
optimizer.zero_grad()
loss.backward()
optimizer.step()
dur.append(time.time() - t0)
if epoch % 10 == 0:
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
acc, n_edges / np.mean(dur) / 1000))
print()
acc = evaluate(model, features, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))
Epoch 00000 | Time(s) 0.0178 | Loss 1.9446 | Accuracy 0.2100 | ETputs(KTEPS) 594.54
Epoch 00010 | Time(s) 0.0153 | Loss 1.7609 | Accuracy 0.3533 | ETputs(KTEPS) 689.33
Epoch 00020 | Time(s) 0.0150 | Loss 1.5518 | Accuracy 0.5633 | ETputs(KTEPS) 703.47
Epoch 00030 | Time(s) 0.0146 | Loss 1.2769 | Accuracy 0.5867 | ETputs(KTEPS) 721.28
Epoch 00040 | Time(s) 0.0143 | Loss 1.0785 | Accuracy 0.6567 | ETputs(KTEPS) 740.36
Epoch 00050 | Time(s) 0.0140 | Loss 0.8881 | Accuracy 0.7067 | ETputs(KTEPS) 754.21
Epoch 00060 | Time(s) 0.0138 | Loss 0.6994 | Accuracy 0.7533 | ETputs(KTEPS) 763.21
Epoch 00070 | Time(s) 0.0137 | Loss 0.6249 | Accuracy 0.7800 | ETputs(KTEPS) 770.54
Epoch 00080 | Time(s) 0.0137 | Loss 0.5048 | Accuracy 0.7800 | ETputs(KTEPS) 772.31
Epoch 00090 | Time(s) 0.0136 | Loss 0.4457 | Accuracy 0.7867 | ETputs(KTEPS) 778.78
Epoch 00100 | Time(s) 0.0135 | Loss 0.4167 | Accuracy 0.7800 | ETputs(KTEPS) 782.25
Epoch 00110 | Time(s) 0.0134 | Loss 0.3389 | Accuracy 0.8000 | ETputs(KTEPS) 786.52
Epoch 00120 | Time(s) 0.0134 | Loss 0.3777 | Accuracy 0.8100 | ETputs(KTEPS) 789.85
Epoch 00130 | Time(s) 0.0133 | Loss 0.3307 | Accuracy 0.8133 | ETputs(KTEPS) 792.00
Epoch 00140 | Time(s) 0.0133 | Loss 0.2542 | Accuracy 0.7933 | ETputs(KTEPS) 794.13
Epoch 00150 | Time(s) 0.0133 | Loss 0.2937 | Accuracy 0.8000 | ETputs(KTEPS) 795.73
Epoch 00160 | Time(s) 0.0132 | Loss 0.2944 | Accuracy 0.8333 | ETputs(KTEPS) 797.04
Epoch 00170 | Time(s) 0.0132 | Loss 0.2161 | Accuracy 0.8167 | ETputs(KTEPS) 799.74
Epoch 00180 | Time(s) 0.0132 | Loss 0.1972 | Accuracy 0.8200 | ETputs(KTEPS) 801.31
Epoch 00190 | Time(s) 0.0131 | Loss 0.2339 | Accuracy 0.8167 | ETputs(KTEPS) 802.92
5.结论
DGL Github DGL 官方文档 《深度学习——Xavier初始化方法》 《DGL 作者答疑!关于 DGL 你想知道的都在这里-周金晶》
推荐阅读
利用 AssemblyAI 在 PyTorch 中建立端到端的语音识别模型 京东姚霆:推理能力,正是多模态技术未来亟需突破的瓶颈 性能超越最新序列推荐模型,华为诺亚方舟提出记忆增强的图神经网络 FPGA 无解漏洞 “StarBleed”轰动一时,今天来扒一下技术细节! 真惨!连各大编程语言都摆起地摊了 发送0.55 ETH花费近260万美元!这笔神秘交易引发大猜想